Forfeiting the founder effect: turnover defines biofilm community succession

PNNL, Spring 2018

About this document

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

This project makes use of many packages, especially Phyloseq https://joey711.github.io/phyloseq/.

The goal of this document it to provide a comprehensive overview of all methods used in the paper for visualizing amplicon data.

Setup:

library("knitr")
library(checkpoint)
checkpoint("2017-09-01", use.knitr = T)
library("rmarkdown")
library("formatR")
library("ggplot2")
library("phyloseq")
# Checkpoint can't install phyloseq, so install like this:
#source('http://bioconductor.org/biocLite.R'); biocLite('phyloseq',suppressUpdates = T)
library("RColorBrewer")
library("viridis")
library("scales")
library("cowplot")
library("vegan")
library("dplyr")
library("multcomp")
library("reshape2")
library("picante")
library("betapart")
#library("parallel")
library("tidyr")
library("broom")


#('phyloseq')
theme_set(theme_bw() + theme(strip.background = element_blank()))
set.seed(711)
knitr::opts_chunk$set(cache=TRUE)

Import data

Our work here will focus on two amplicon data sets. One from the 16S gene, the other from the 18S gene. Because there amplicons came from different primers and are expected to target different genes (of different evolutionary history, length, complexity, etc) they will be analyzed separately. (This also makes the stats simpler.)

meta <- import_qiime_sample_data(file.path('../data/metadata.txt'))


no_meta <- import_biom(file.path('../data/16S_otus_vsearch/otu_table_w_tax_silva.biom'),
                       file.path('../data/16S_otus_vsearch/rep_set.tre'))
full16s <- merge_phyloseq(meta, no_meta)
full16s
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 3234 taxa and 91 samples ]
## sample_data() Sample Data:       [ 91 samples by 9 sample variables ]
## tax_table()   Taxonomy Table:    [ 3234 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 3234 tips and 3232 internal nodes ]
no_meta_18s <- import_biom(file.path('../data/18S_otus_vsearch/otu_table_w_tax_silva.biom'),
                           file.path('../data/18S_otus_vsearch/rep_set.tre'))
full18s <- merge_phyloseq(meta, no_meta_18s)
full18s
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2861 taxa and 77 samples ]
## sample_data() Sample Data:       [ 77 samples by 9 sample variables ]
## tax_table()   Taxonomy Table:    [ 2861 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 2861 tips and 2859 internal nodes ]

Explore metadata

Let’s take a look at columns in the metadata file. Also fix strange columns.

# Let's use 16S, as it's our main focus, and has more samples.
meta <- sample_data(full16s)
meta.n_unique <- rapply(meta, function(x) length(table(x)))
# The pairwise interesting factors
meta.n_unique[meta.n_unique == 2]
## named integer(0)
# Other potentially interesting factors
meta.n_unique[meta.n_unique > 2 & meta.n_unique < max(meta.n_unique)]
##       Day Timepoint 
##         6         6
sample_data(full16s)$Day <- factor(sample_data(full16s)$Day)
sample_data(full18s)$Day <- factor(sample_data(full18s)$Day)

Preprocess

Remove all non-bacteria microbes, along with off-target effects of the 16S primers, like OTUs annotated as chloroplasts or mitochondria.

Rarefy and select Days 8-79 as a cohort for downstream analysis. The initial day of sampling had very little biomass on the slides, and very few samples were successfully sequenced. Day 8 will be the initial day analyzed.

16S

filtered16s <- subset_taxa(full16s, Rank1 == "D_0__Bacteria")
ntaxa(filtered16s) / ntaxa(full16s)
## [1] 0.9094001
sum(taxa_sums(filtered16s)) / sum(taxa_sums(full16s))
## [1] 0.9961169
# What taxa are being removed in this filter?
full16s %>% subset_taxa(Rank1 != "D_0__Bacteria") %>% tax_table %>% data.frame %>% select_("Rank1") %>% table
## .
## D_0__Archaea   Unassigned 
##          102          191
# Mostly Archaea and Unassigned microbes.

filtered16s <- subset_taxa(filtered16s, Rank5 != "D_4__Mitochondria")
ntaxa(filtered16s) / ntaxa(full16s) # about 4% are mirochondria
## [1] 0.8574521
sum(taxa_sums(filtered16s)) / sum(taxa_sums(full16s))
## [1] 0.9404225
# Just to be sure, let's also filter at the class level.
filtered16s <- subset_taxa(filtered16s, Rank4 != "D_3__Chloroplast")
ntaxa(filtered16s) / ntaxa(full16s) # another 2% are chloroplast
## [1] 0.8345702
sum(taxa_sums(filtered16s)) / sum(taxa_sums(full16s))
## [1] 0.6990603
# Lot's of c__Chloroplast in these samples!

plot(sort(sample_sums(filtered16s)))

#ggplot(sample_data(filtered16s), aes(sample_sums(filtered16s))) + geom_histogram(binwidth = 10000, aes(fill=Day))

sort(sample_sums(filtered16s))[1:30]
##     T6.12     T4.13   T1..2.7 T1..14.19     T3.11      T4.5     T4.10 
##         0         1         1         1         1         2         4 
##     T5.16     T3.19      T3.3     T6.15      T3.5     T4.20     T3.18 
##         4        11        15        19        28        36       307 
##      T1.1      T2.1     T4.18      T5.5      T3.7     T6.14      T5.8 
##       356       937      2239      7721      8090     13168     13978 
##      T5.3      T5.6     T6.11     T3.20      T3.2      T6.6     T6.18 
##     17533     18996     20605     20792     26188     29167     29539 
##     T6.17      T4.3 
##     29569     31461
set.seed(711)
rar.16s <- rarefy_even_depth(filtered16s, sample.size = 13000, replace = F)
## You set `rngseed` to FALSE. Make sure you've set & recorded
##  the random seed of your session for reproducibility.
## See `?set.seed`
## ...
## 19 samples removedbecause they contained fewer reads than `sample.size`.
## Up to first five removed samples are:
## T4.18T4.10T4.13T5.5T1..2.7   
## ...
## 383OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...
rar.16s
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2316 taxa and 72 samples ]
## sample_data() Sample Data:       [ 72 samples by 9 sample variables ]
## tax_table()   Taxonomy Table:    [ 2316 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 2316 tips and 2314 internal nodes ]
rar.16s.cohort <- subset_samples(rar.16s, Day %in% c(8,14,35,56,79))
rar.16s.cohort
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2316 taxa and 71 samples ]
## sample_data() Sample Data:       [ 71 samples by 9 sample variables ]
## tax_table()   Taxonomy Table:    [ 2316 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 2316 tips and 2314 internal nodes ]

18S

18S is handled similarly. A different set of off-target effects for 18S primers are used.

filtered18s <- subset_taxa(full18s, Rank1 %in% c("D_0__Archaea", "D_0__Eukaryota"))
ntaxa(filtered18s) / ntaxa(full18s) # these 18S primers amplify lots of bacteria, but that's expected
## [1] 0.3477805
sum(taxa_sums(filtered18s)) / sum(taxa_sums(full18s))
## [1] 0.4691395
# Now that we are using a unified database, also filter Mitochondria and
# chloroplasts on the 18S amplicons

filtered18s <- subset_taxa(filtered18s, Rank5 != "D_4__Mitochondria")
ntaxa(filtered18s) / ntaxa(full18s) # more than 25% of OTUs are mitochondria
## [1] 0.2610975
sum(taxa_sums(filtered18s)) / sum(taxa_sums(full18s)) # but < 4% of reads
## [1] 0.4633287
filtered18s <- subset_taxa(filtered18s, Rank4 != "D_3__Chloroplast")
ntaxa(filtered18s) / ntaxa(full18s) # < 0.1% are chloroplast
## [1] 0.2610975
sum(taxa_sums(filtered18s)) / sum(taxa_sums(full18s))
## [1] 0.4633287
# Remove 'tomatoes' (actually from two types of bacteria).

# badTaxa <- full18s %>% subset_taxa(Rank7 == "D_12__Solanum lycopersicum (tomato)") %>% taxa_names
# allTaxa <- taxa_names(full18s)
# keepTaxa <- allTaxa[!(allTaxa %in% badTaxa)]

# filtered18s <- prune_taxa(keepTaxa, full18s)

# ntaxa(filtered18s) / ntaxa(full18s)
# sum(taxa_sums(filtered18s)) / sum(taxa_sums(full18s))


plot(sort(sample_sums(filtered18s)))

#ggplot(sample_data(filtered18s), aes(sample_sums(filtered18s))) + geom_histogram(binwidth = 10000, aes(fill=Day))

sort(sample_sums(filtered18s))[1:30]
##      T3.9 T1..14.19      T1.1     T6.20     T6.18     T6.19      T3.6 
##         0        59       136       256       367      1938      3427 
##      T4.5      T6.6     T6.14      T2.1   T2..2.5      T3.2     T3.13 
##     12311     13303     13789     14176     18545     19463     21461 
##     T4.16   T1..2.7      T3.1     T3.10     T3.14      T5.3     T3.20 
##     22137     24512     25265     25838     27411     29289     31585 
##     T3.12      T4.9  T2..6.10      T3.8     T6.15     T4.10      T4.2 
##     31986     35767     36729     36822     40349     41548     43726 
##     T4.18      T4.8 
##     46149     46568
set.seed(711)
rar.18s <- rarefy_even_depth(filtered18s, sample.size = 13000, replace = F)
## You set `rngseed` to FALSE. Make sure you've set & recorded
##  the random seed of your session for reproducibility.
## See `?set.seed`
## ...
## 8 samples removedbecause they contained fewer reads than `sample.size`.
## Up to first five removed samples are:
## T6.19T6.20T6.18T4.5T1..14.19 
## ...
## 64OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...
rar.18s
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 683 taxa and 69 samples ]
## sample_data() Sample Data:       [ 69 samples by 9 sample variables ]
## tax_table()   Taxonomy Table:    [ 683 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 683 tips and 682 internal nodes ]
rar.18s.cohort <- subset_samples(rar.18s, Day %in% c(8,14,35,56,79))
rar.18s.cohort
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 683 taxa and 68 samples ]
## sample_data() Sample Data:       [ 68 samples by 9 sample variables ]
## tax_table()   Taxonomy Table:    [ 683 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 683 tips and 682 internal nodes ]

Cohorts used in downstream analysis

The main feature is Day, with most samples from later days.

Looks like we lost samples throughout. Mostly the loss near the start is more noticeable.

Taxa counts

How many Phyla and OTUs are in the initial Day 8 samples?

rar.16s.cohort %>% subset_samples(Day == "8") %>% filter_taxa(function(x){max(x) > 0 }, prune = T) %>% tax_glom("Rank2") %>% ntaxa
## [1] 33
rar.18s.cohort %>% subset_samples(Day == "8") %>% filter_taxa(function(x){max(x) > 0 }, prune = T) %>% tax_glom("Rank3") %>% ntaxa
## [1] 18
rar.16s.cohort %>% subset_samples(Day == "8") %>% filter_taxa(function(x){max(x) > 0 }, prune = T) %>% ntaxa
## [1] 999
rar.18s.cohort %>% subset_samples(Day == "8") %>% filter_taxa(function(x){max(x) > 0 }, prune = T) %>% ntaxa
## [1] 291

Analysis and Graphs

I’ll use rarefied data, for consistency.

Beta Diversity

Beta Diversity was investigated, but not included in final paper.

PyNAST alignments to the Greengenes and Silva database were used for both 16S and 18S data, respectively. ML Tree was built using FastTree2.

Viridis is in vogue, and looks pretty good here.

bray.16s = ordinate(rar.16s.cohort, method = "PCoA", distance = "bray")
bray.18s = ordinate(rar.18s.cohort, method = "PCoA", distance = "bray")

plot_ordination(rar.16s.cohort, bray.16s, color="Day") + v

plot_ordination(rar.18s.cohort, bray.18s, color="Day") + pl

Alpha diversity

metrics <- c("Observed", "SimpsonE")
rich.16s <- estimate_richness_mod(rar.16s.cohort, measures = metrics)
rich.18s <- estimate_richness_mod(rar.18s.cohort, measures = metrics)

# Rename for graphing
graphnames <- c("Richness", "Evenness")
names(rich.16s) <- graphnames
names(rich.18s) <- graphnames


# merge richness with metadata
DF.16s <- merge(rich.16s, sample_data(rar.16s.cohort), by = 0)
DF.18s <- merge(rich.18s, sample_data(rar.18s.cohort), by = 0)

# melt for graphing
reshapevars <- c("Richness", "Evenness")
mdf.16s = reshape2::melt(DF.16s, measure.vars = graphnames)
mdf.18s = reshape2::melt(DF.18s, measure.vars = graphnames)


alpha.16s.alt <- ggplot(mdf.16s, aes(Day, value, color = Day))
alpha.16s.alt <- alpha.16s.alt +
  geom_boxplot(color = "gray", outlier.size = 0) +
  geom_jitter(width = 0.2) +
  facet_grid(facets = variable~., scales = "free_y", switch = "y") +
  labs(x = "Sampling Day", title = "16S OTUs") +
  v + theme(legend.position = "none",
            strip.background = element_blank(),
            axis.title.y = element_blank(),
            strip.placement = "outside"
            #, plot.margin=unit(c(5,5,-25,5), units = "pt")
            )
alpha.16s.alt

alpha.18s.alt <- ggplot(mdf.18s, aes(Day, value, color = Day))
alpha.18s.alt <- alpha.18s.alt +
  geom_boxplot(color = "gray", outlier.size = 0) +
  geom_jitter(width = 0.2) +
  facet_grid(facets = variable~., scales = "free_y", switch = "y") +
  labs(x = "Sampling Day", title = "18S OTUs") +
  pl + theme(legend.position = "none",
            strip.background = element_blank(),
            axis.title.y = element_blank(),
            strip.text.y = element_blank()
            #axis.text.y = element_blank()
            #, plot.margin=unit(c(5,5,-25,5), units = "pt")
            )
alpha.18s.alt

alpha.both.alt <- plot_grid(alpha.16s.alt, alpha.18s.alt,
                        #labels = c("A", "B"),
                        align = "h", nrow = 1, rel_widths = c(1.1, 1))
alpha.both.alt

ggsave("./figures/alpha.both.pdf", alpha.both.alt, scale = 1.3, width = 89, height = 77, units = "mm")

Stat test for alpha diversity

Pairwise comparison between groups using multcomp::glht()

16S

# Simple means
DF.16s %>% group_by(Day) %>% summarize(meanRich = mean(Richness), meanEven = mean(Evenness))
## # A tibble: 5 x 3
##      Day meanRich   meanEven
##   <fctr>    <dbl>      <dbl>
## 1      8 549.0000 0.02287873
## 2     14 581.5714 0.02722612
## 3     35 580.8235 0.02386853
## 4     56 222.5556 0.01221119
## 5     79 225.6111 0.03012419
1 - 233.3889 / 557.7500 # richness decrease between Day 8 and 79
## [1] 0.5815528
0.02940754 / 0.02391469 - 1 # Evenness increase between Day 35 and 79
## [1] 0.2296852
#DF.16s %>% ggplot(aes(x = Richness)) + geom_dotplot() + facet_grid(Day~.)
DF.16s %>% glm(Richness ~ Day, family = "poisson", data = .) %>%
  glht(linfct = mcp(Day = "Tukey")) %>% summary %>% tidy %>% kable
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
lhs rhs estimate std.error statistic p.value
14 - 8 0 0.0576354 0.0240457 2.3969140 0.1109000
35 - 8 0 0.0563485 0.0235934 2.3883156 0.1136119
56 - 8 0 -0.9029217 0.0265518 -34.0060465 0.0000000
79 - 8 0 -0.8892857 0.0264881 -33.5730690 0.0000000
35 - 14 0 -0.0012868 0.0149698 -0.0859612 0.9999871
56 - 14 0 -0.9605570 0.0192988 -49.7728330 0.0000000
79 - 14 0 -0.9469210 0.0192110 -49.2904530 0.0000000
56 - 35 0 -0.9592702 0.0187323 -51.2093035 0.0000000
79 - 35 0 -0.9456342 0.0186419 -50.7262867 0.0000000
79 - 56 0 0.0136360 0.0222681 0.6123560 0.9720225
#DF.16s %>% ggplot(aes(x = Evenness)) + geom_dotplot() + facet_grid(Day~.)
DF.16s %>% glm(Evenness ~ Day, family = "Gamma", data = .) %>%
  glht(linfct = mcp(Day = "Tukey")) %>% summary %>% tidy %>% kable
lhs rhs estimate std.error statistic p.value
14 - 8 0 -6.979274 7.436823 -0.9384752 0.8733075
35 - 8 0 -1.812552 7.481335 -0.2422766 0.9991682
56 - 8 0 38.183396 9.051039 4.2186753 0.0002393
79 - 8 0 -10.512806 7.205576 -1.4589821 0.5699695
35 - 14 0 5.166722 4.385669 1.1780920 0.7495326
56 - 14 0 45.162670 6.721982 6.7186542 0.0000000
79 - 14 0 -3.533532 3.896671 -0.9068078 0.8865641
56 - 35 0 39.995948 6.771194 5.9067793 0.0000000
79 - 35 0 -8.700254 3.980964 -2.1854640 0.1716796
79 - 56 0 -48.696202 6.465216 -7.5320301 0.0000000
# overall median evenness
DF.16s %>% summarize(medRich = median(Richness), medEven = median(Evenness))
##   medRich    medEven
## 1     393 0.02181529

18S

# Simple means
DF.18s %>% group_by(Day) %>% summarize(meanRich = mean(Richness), meanEven = mean(Evenness))
## # A tibble: 5 x 3
##      Day  meanRich   meanEven
##   <fctr>     <dbl>      <dbl>
## 1      8 117.75000 0.02932658
## 2     14 140.00000 0.02368217
## 3     35 133.05556 0.03147696
## 4     56  91.88235 0.02790451
## 5     79  88.15385 0.03015231
227.0000 / 372.7500 # fraction of OTUs remaining on day 79 (vs day 8)
## [1] 0.6089873
1 - 227.0000 / 372.7500 # fraction of OTUs lost on day 79 (vs day 8)
## [1] 0.3910127
#DF.18s %>% ggplot(aes(x = Richness)) + geom_dotplot() + facet_grid(Day~.)
DF.18s %>% glm(Richness ~ Day, family = "poisson", data = .) %>%
  glht(linfct = mcp(Day = "Tukey")) %>% summary %>% tidy %>% kable
lhs rhs estimate std.error statistic p.value
14 - 8 0 0.1730787 0.0506909 3.414396 0.0055153
35 - 8 0 0.1222030 0.0504051 2.424419 0.1037303
56 - 8 0 -0.2480547 0.0525675 -4.718787 0.0000195
79 - 8 0 -0.2894802 0.0547333 -5.288922 0.0000006
35 - 14 0 -0.0508757 0.0293933 -1.730861 0.4033461
56 - 14 0 -0.4211334 0.0329641 -12.775509 0.0000000
79 - 14 0 -0.4625589 0.0363184 -12.736200 0.0000000
56 - 35 0 -0.3702578 0.0325229 -11.384514 0.0000000
79 - 35 0 -0.4116832 0.0359185 -11.461602 0.0000000
79 - 56 0 -0.0414254 0.0388948 -1.065064 0.8177682
#DF.18s %>% ggplot(aes(x = Evenness)) + geom_dotplot() + facet_grid(Day~.)
DF.18s %>% glm(Evenness ~ Day, family = "Gamma", data = .) %>%
  glht(linfct = mcp(Day = "Tukey")) %>% summary %>% tidy %>% kable
lhs rhs estimate std.error statistic p.value
14 - 8 0 8.1270876 6.084522 1.3356986 0.6595859
35 - 8 0 -2.3295001 5.650136 -0.4122910 0.9936631
56 - 8 0 1.7377377 5.806619 0.2992684 0.9981726
79 - 8 0 -0.9338065 5.878039 -0.1588636 0.9998504
35 - 14 0 -10.4565876 3.927071 -2.6626937 0.0565265
56 - 14 0 -6.3893498 4.149057 -1.5399521 0.5257827
79 - 14 0 -9.0608940 4.248435 -2.1327606 0.1984448
56 - 35 0 4.0672378 3.480991 1.1684136 0.7620463
79 - 35 0 1.3956936 3.598864 0.3878151 0.9949921
79 - 56 0 -2.6715442 3.839871 -0.6957380 0.9556500
# overall median evenness
DF.18s %>% summarize(medRich = median(Richness), medEven = median(Evenness))
##   medRich    medEven
## 1     116 0.02848502

Abundance plots

There will be a matching set of relative abundance plots, for each amplicon type.

  1. Area plot at Phylum level of most common taxa. Samples merged by day.

  2. Rank abundance curve of common genera in day 8 and day 79. This will show variance of the separate genera and help visualize the selection threshold for defining a ‘founders species.’

  3. Line plot of selected genera above that threshold, highlighting ‘founders species.’ It will not show variance of each genera; variance is already shown on 2. and additional information would muddy the graph.

When initially proposed, we called 2. a ‘scree plot.’ We now use the more accurate term ‘rank abundance curve.’ (A scree plots is a rank abundance curve used to show the decreasing eigenvalues of an ordinations.) While our documentation uses the correct term, ‘scree’ is still used in the code.

16S RA 1: phylum

# Merge by day
cohort.merged <- merge_samples(rar.16s.cohort, "Day")

# Fix mangled metadata
sample_data(cohort.merged)$Day <- as.numeric(sample_names(cohort.merged))

# Glom OTUs by Phyla and transform to RA
glom <- tax_glom(cohort.merged, taxrank = "Rank2")
glom <- transform_sample_counts(glom, function(x) x / sum(x))

prune_taxa(names(sort(taxa_sums(glom), TRUE))[0:11], glom) %>% psmelt() %>%
  group_by(OTU, Rank2) %>% summarize(mean = mean(Abundance)) %>% arrange(-mean) %>%
  kable()
OTU Rank2 mean
OTU_4 D_1__Proteobacteria 0.4806892
OTU_5 D_1__Cyanobacteria 0.3220583
OTU_9 D_1__Bacteroidetes 0.0841774
OTU_31 D_1__Actinobacteria 0.0298159
OTU_12 D_1__Planctomycetes 0.0253331
OTU_36 D_1__Firmicutes 0.0176987
OTU_21 D_1__Chloroflexi 0.0128887
OTU_37 D_1__Spirochaetes 0.0117361
OTU_50 D_1__Verrucomicrobia 0.0041929
OTU_42 D_1__Chlamydiae 0.0026787
OTU_90 D_1__Patescibacteria 0.0020768
# Select top Phyla
glom.top <- prune_taxa(names(sort(taxa_sums(glom), TRUE))[0:7], glom)
sum(taxa_sums(glom.top)) / sum(taxa_sums(glom))
## [1] 0.9726613
# Melt for graphing and strip prefix
glom.top.melt <- psmelt(glom.top)
glom.top.melt <- arrange(glom.top.melt, Day, Rank2)
glom.top.melt$Phylum <- gsub('D_1__', '', glom.top.melt$Rank2)

# Graph
glom.top.gg <- ggplot(glom.top.melt, aes(x = as.numeric(Day), y = Abundance, fill = Phylum))
part1 <- glom.top.gg + geom_area() +
  v.fill +
  labs(fill = "Bacteria", y = "Abundance") +
  theme(plot.margin=unit(c(5,5,-25,5), units = "pt"), legend.justification = 'left') +
  scale_x_continuous("Sampling Day", c(8,14,35,56,79), NULL)
part1

18S RA 1: phylum

# Merge by day
cohort.merged18s <- merge_samples(rar.18s.cohort, "Day")

# Fix mangled metadata
sample_data(cohort.merged18s)$Day <- as.numeric(sample_names(cohort.merged18s))

# Glom OTUs by Phyla and transform to RA
glom.18s <- tax_glom(cohort.merged18s, taxrank = "Rank3")
glom.18s <- transform_sample_counts(glom.18s, function(x) x / sum(x))

prune_taxa(names(sort(taxa_sums(glom.18s), TRUE))[0:11], glom.18s) %>% psmelt() %>%
  group_by(OTU, Rank2, Rank3) %>% summarize(mean = mean(Abundance)) %>% arrange(-mean) %>%
  kable()
OTU Rank2 Rank3 mean
OTU_2 D_1__SAR D_2__Stramenopiles 0.7419579
OTU_12 D_1__Opisthokonta D_2__Holozoa 0.1474057
OTU_6 D_1__SAR D_2__Rhizaria 0.0881805
OTU_63 D_1__SAR D_2__Alveolata 0.0058577
OTU_64 D_1__Archaeplastida D_2__Chloroplastida 0.0057623
OTU_122 D_1__Amoebozoa D_2__Discosea 0.0040587
OTU_92 D_1__Nanoarchaeaeota D_2__Woesearchaeia 0.0019226
OTU_229 D_1__Opisthokonta D_2__Nucletmycea 0.0014938
OTU_117 D_1__Excavata D_2__Discoba 0.0012650
OTU_202 D_1__Euryarchaeota D_2__Halobacteria 0.0010275
OTU_166 D_1__Amoebozoa D_2__Tubulinea 0.0006229
# Select top Phyla
glom.18s.top <- prune_taxa(names(sort(taxa_sums(glom.18s), TRUE))[0:7], glom.18s)
sum(taxa_sums(glom.18s.top)) / sum(taxa_sums(glom.18s))
## [1] 0.9951453
# Melt for graphing and strip prefix
glom.18s.top.melt <- psmelt(glom.18s.top)
glom.18s.top.melt <- arrange(glom.18s.top.melt, Day, Rank3)

glom.18s.top.melt$Phylum <- gsub('D_.__', '', glom.18s.top.melt$Rank3)
glom.18s.top.melt$Phylum <- gsub('D_..__', '', glom.18s.top.melt$Phylum)

# Graph
glom.18s.top.gg <- ggplot(glom.18s.top.melt, aes(x = as.numeric(Day), y = Abundance, fill = Phylum))
part1.18s.Rank3 <- glom.18s.top.gg + geom_area() +
  pl.fill +
  labs(fill="Eukaryotes", y = "Abundance") +
  theme(plot.margin=unit(c(0,5,5,5), units = "pt"), legend.justification = 'left') +
  scale_x_continuous("Sampling Day", c(8,14,35,56,79), NULL)
part1.18s.Rank3

# Merge by day
cohort.merged18s <- merge_samples(rar.18s.cohort, "Day")

# Fix mangled metadata
sample_data(cohort.merged18s)$Day <- as.numeric(sample_names(cohort.merged18s))

# Glom OTUs by Phyla and transform to RA
glom.18s <- tax_glom(cohort.merged18s, taxrank = "Rank2")
glom.18s <- transform_sample_counts(glom.18s, function(x) x / sum(x))

prune_taxa(names(sort(taxa_sums(glom.18s), TRUE))[0:11], glom.18s) %>% psmelt() %>%
  group_by(OTU, Rank2) %>% summarize(total = sum(Abundance)) %>% arrange(-total) %>%
  kable()
OTU Rank2 total
OTU_2 D_1__SAR 4.1799803
OTU_12 D_1__Opisthokonta 0.7447283
OTU_64 D_1__Archaeplastida 0.0288117
OTU_122 D_1__Amoebozoa 0.0234287
OTU_92 D_1__Nanoarchaeaeota 0.0096128
OTU_117 D_1__Excavata 0.0067082
OTU_202 D_1__Euryarchaeota 0.0064257
OTU_828 D_1__Centrohelida 0.0001669
OTU_2776 D_1__Cryptophyceae 0.0000583
OTU_2115 D_1__Incertae Sedis 0.0000438
OTU_1246 D_1__Haptophyta 0.0000262
# Select top Phyla
glom.18s.top <- prune_taxa(names(sort(taxa_sums(glom.18s), TRUE))[0:7], glom.18s)
sum(taxa_sums(glom.18s.top)) / sum(taxa_sums(glom.18s))
## [1] 0.9999391
# Melt for graphing and strip prefix
glom.18s.top.melt <- psmelt(glom.18s.top)
glom.18s.top.melt <- arrange(glom.18s.top.melt, Day, Rank2)

glom.18s.top.melt$Phylum <- gsub('D_.__', '', glom.18s.top.melt$Rank2)
glom.18s.top.melt$Phylum <- gsub('D_..__', '', glom.18s.top.melt$Phylum)

# Graph
glom.18s.top.gg <- ggplot(glom.18s.top.melt, aes(x = as.numeric(Day), y = Abundance, fill = Phylum))
part1.18s.Rank2 <- glom.18s.top.gg + geom_area() +
  pl.fill +
  labs(fill="Eukaryotes", y = "Abundance") +
  theme(plot.margin=unit(c(0,5,5,5), units = "pt"), legend.justification = 'left') +
  scale_x_continuous("Sampling Day", c(8,14,35,56,79), NULL)
part1.18s.Rank2

phylum.both.rank3 <- plot_grid(part1, part1.18s.Rank3, rel_heights = c(1,1.2),
                        #labels = c("A", "B"),
                        align = "v", nrow = 2)
phylum.both.rank3

ggsave("./figures/phylum.both.Rank3.pdf", phylum.both.rank3, scale = 1.3, width = 89, height = 89, units = "mm")

phylum.both.rank2 <- plot_grid(part1, part1.18s.Rank2, rel_heights = c(1,1.2),
                        #labels = c("A", "B"),
                        align = "v", nrow = 2)
phylum.both.rank2

ggsave("./figures/phylum.both.Rank2.pdf", phylum.both.rank2, scale = 1.3, width = 89, height = 89, units = "mm")
# combined.fig1 <-
#   plot_grid(alpha.both.alt, phylum.both, labels = c("A", "B"),
#             align = "h", nrow = 1, rel_widths = c(1.1, 1), scale = c(1,1))
# ggsave("./figures/fig1.pdf", combined.fig1, scale = 1.4, width = 178, height = 77, units = "mm")
#
# combined.wide.fig1 <-
#   plot_grid(alpha.both.alt, phylum.both, labels = c("A", "B"),
#             align = "v", ncol = 1, rel_heights = c(1, 1.2), scale = c(1,1))
# ggsave("./figures/fig1-wide.pdf", combined.wide.fig1, scale = 1.3, width = 89, height = 160, units = "mm")
#

# new all silva graphs
combined.tall.fig1.Rank2 <-
  plot_grid(alpha.both.alt, phylum.both.rank2, labels = c("A", "B"),
            align = "v", ncol = 1, rel_heights = c(1, 1.2), scale = c(1,1))
ggsave("./figures/fig1-tall-Rank2.pdf", combined.tall.fig1.Rank2, scale = 1.3, width = 89, height = 160, units = "mm")

combined.tall.fig1.Rank3 <-
  plot_grid(alpha.both.alt, phylum.both.rank3, labels = c("A", "B"),
            align = "v", ncol = 1, rel_heights = c(1, 1.2), scale = c(1,1))
ggsave("./figures/fig1-tall-Rank3.pdf", combined.tall.fig1.Rank3, scale = 1.3, width = 89, height = 160, units = "mm")
#Save and exit.
knitr::knit_exit()

Function to enhance a psmelt data.frame

These functions are specific to this data set and are not intended for general use.

clean_gg_tax() strip GreenGenes prefixes from taxonomy strings.

clean_silva_tax() strip SILVA prefixes from taxonomy strings.

aggregate_tax_by_day() aggregate(Abundance ~ Day + Taxonomy) and add stats used for box plots.

clean_gg_tax <- function(pmelted){
  pmelted$Taxonomy <- paste(pmelted$Rank4,
                            pmelted$Rank5,
                            pmelted$Rank6, sep = ", ")
  pmelted$Taxonomy <- gsub('.__', '', pmelted$Taxonomy)
  pmelted$Taxonomy <- gsub(' ,', '', pmelted$Taxonomy)

  pmelted$Kingdom <- gsub('.__', '', pmelted$Rank1)
  pmelted$Phylum <- gsub('.__', '', pmelted$Rank2)
  pmelted$Class <- gsub('.__', '', pmelted$Rank3)
  pmelted$Order <- gsub('.__', '', pmelted$Rank4)
  pmelted$Family <- gsub('.__', '', pmelted$Rank5)
  pmelted$Genus <- gsub('.__', '', pmelted$Rank6)

  return(pmelted)
}

clean_silva_tax <- function(pmelted){
  pmelted$Taxonomy <- paste(pmelted$Rank4,
                            pmelted$Rank5,
                            pmelted$Rank6, sep = ", ")
  pmelted$Taxonomy <- gsub('D_.__', '', pmelted$Taxonomy)
  pmelted$Taxonomy <- gsub('D_..__', '', pmelted$Taxonomy)
  pmelted$Taxonomy <- gsub(' ,', '', pmelted$Taxonomy)

  pmelted$Kingdom <- gsub('D_.__', '', pmelted$Rank1)  #D_0
  pmelted$Phylum <- gsub('D_.__', '', pmelted$Rank2) #D_1
  pmelted$Class <- gsub('D_.__', '', pmelted$Rank3)  #D_2
  pmelted$Order <- gsub('D_.__', '', pmelted$Rank4)   #D_3
  pmelted$Family <- gsub('D_.__', '', pmelted$Rank5)   #D_4
  pmelted$Genus <- gsub('D_.__', '', pmelted$Rank6)  #D_5
  
  if("Rank7" %in% names(pmelted)){
    pmelted$Species <- gsub('D_.__', '', pmelted$Rank7)  #D_6
  }

  return(pmelted)
}

aggregate_tax_by_day <- function(pmelted){
  # Calculate median of the Abundance for each taxa at each day
  paggregated <- aggregate(Abundance ~ OTU + Rank1 + Rank2 + Rank3 + Day + Taxonomy,
                       data = pmelted,
                       FUN = function(x) median(x))
  return(paggregated)
}

aggregate_tax_by_day_n <- function(pmelted){
  # Same as above, but also casts Day as numeric
  paggregated <- aggregate(Abundance ~ OTU + Rank1 + Rank2 + Rank3 + Day + Taxonomy,
                       data = pmelted,
                       FUN = function(x) median(x))

  paggregated$Day <- as.numeric(levels(paggregated$Day))[paggregated$Day]

  return(paggregated)
}
# Glom OTUs by phyla and transform to RA
glom6.16s <- tax_glom(rar.16s.cohort, taxrank = "Rank6")
glom6.16s <- transform_sample_counts(glom6.16s, function(x) x / sum(x))

glom6.18s <- tax_glom(rar.18s.cohort, taxrank = "Rank6")
glom6.18s <- transform_sample_counts(glom6.18s, function(x) x / sum(x))
glom7.18s <- tax_glom(rar.18s.cohort, taxrank = "Rank7")
glom7.18s <- transform_sample_counts(glom7.18s, function(x) x / sum(x))

Set threshold for inclusion

selection_threshold <- 0.05
# for a comparison to 0.05, see the file `Compare .02 vs .05.PNG`

16S genera rank abundance curve, for days 8 and 79

# Select top genera from day 8 and 79
glom6.16s.days <- subset_samples(glom6.16s, Day %in% c("8", "79"))
glom6.16s.days.top <- prune_taxa(names(sort(taxa_sums(glom6.16s.days), TRUE))[0:20], glom6.16s.days)

# Melt to dataframe
glom6.16s.days.top.melt <- clean_silva_tax(psmelt(glom6.16s.days.top))

# Sort OTUs by their medians in day 8
glom6.16s.days.top.melt.8 <- subset(glom6.16s.days.top.melt, Day == "8")
reordered_levels <- levels(reorder(glom6.16s.days.top.melt.8$OTU, -(glom6.16s.days.top.melt.8$Abundance), median))[]
glom6.16s.days.top.melt$OTU <- factor(glom6.16s.days.top.melt$OTU, levels = reordered_levels)

levels(glom6.16s.days.top.melt$Day) <- c("Day 8", "Day 79")

# Graph, splitting by day
glom6.16s.days.top.gg <- ggplot(glom6.16s.days.top.melt, aes(x=OTU, y = Abundance))
scree.16s <- glom6.16s.days.top.gg +
  geom_hline(yintercept = selection_threshold, size = 1) +
  geom_boxplot(aes(color = Phylum)) +
  v +
  labs(title = "Most Common Bacteria", y = "Abundance") +
  facet_grid(~Day) +
  #scale_y_continuous(limits = c(-.02, .95)) +
  theme(axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        legend.position = "bottom",
        strip.background = element_blank()) +
  guides(color=guide_legend(nrow=2,byrow=TRUE))
scree.16s

# Get the medians shown in the graph
glom6.16s.days.top.median <- aggregate_tax_by_day(glom6.16s.days.top.melt)

# Select the genera with medians above the cutoff.
glom6.16s.days.top.median.top <- glom6.16s.days.top.median %>%
  filter(Abundance > selection_threshold) %>% arrange(Day, -Abundance)
glom6.16s.days.top.median.top
##      OTU         Rank1               Rank2                    Rank3    Day
## 1  OTU_5 D_0__Bacteria  D_1__Cyanobacteria    D_2__Oxyphotobacteria  Day 8
## 2 OTU_16 D_0__Bacteria D_1__Proteobacteria D_2__Alphaproteobacteria  Day 8
## 3 OTU_17 D_0__Bacteria  D_1__Cyanobacteria    D_2__Oxyphotobacteria  Day 8
## 4  OTU_9 D_0__Bacteria  D_1__Bacteroidetes        D_2__Rhodothermia Day 79
## 5  OTU_6 D_0__Bacteria D_1__Proteobacteria D_2__Gammaproteobacteria Day 79
## 6 OTU_18 D_0__Bacteria D_1__Proteobacteria D_2__Deltaproteobacteria Day 79
## 7  OTU_5 D_0__Bacteria  D_1__Cyanobacteria    D_2__Oxyphotobacteria Day 79
##                                                     Taxonomy  Abundance
## 1               Phormidesmiales, Nodosilineaceae, uncultured 0.47318313
## 2         Sphingomonadales, Sphingomonadaceae, Erythrobacter 0.09406673
## 3        Nostocales, Geitlerinemaceae, Geitlerinema PCC-7105 0.05263741
## 4                 Balneolales, Balneolaceae, Rhodohalobacter 0.29750263
## 5 Oceanospirillales, Saccharospirillaceae, Saccharospirillum 0.24477015
## 6      Bradymonadales, Bradymonadaceae, uncultured bacterium 0.11512963
## 7               Phormidesmiales, Nodosilineaceae, uncultured 0.11346903
# Select these OTUs from all days, for use in the line graphs
glom6.16s.all.select_taxa <- prune_taxa(as.character(glom6.16s.days.top.median.top$OTU), glom6.16s)
glom6.16s.all.select_taxa
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 6 taxa and 71 samples ]
## sample_data() Sample Data:       [ 71 samples by 9 sample variables ]
## tax_table()   Taxonomy Table:    [ 6 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 6 tips and 5 internal nodes ]

18S rank abundance curve, for days 8 and 79

# Select top genera from day 8 and 79
glom.18s.days <- subset_samples(glom6.18s, Day %in% c("8", "79"))
glom.18s.days.top <- prune_taxa(names(sort(taxa_sums(glom.18s.days), TRUE))[0:20], glom.18s.days)

# Melt to dataframe
glom.18s.days.top.melt <- clean_silva_tax(psmelt(glom.18s.days.top))

# Sort OTUs by their medians in day 8
glom.18s.days.top.melt.8 <- subset(glom.18s.days.top.melt, Day == "8")
reordered_levels.18s <- levels(reorder(glom.18s.days.top.melt.8$OTU, -(glom.18s.days.top.melt.8$Abundance), median))[]
glom.18s.days.top.melt$OTU <- factor(glom.18s.days.top.melt$OTU, levels = reordered_levels.18s)

levels(glom.18s.days.top.melt$Day) <- c("Day 8", "Day 79")


# Graph, splitting by day
glom.18s.days.top.gg <- ggplot(glom.18s.days.top.melt, aes(x=OTU, y = Abundance))
scree.18s <- glom.18s.days.top.gg +
  geom_hline(yintercept = selection_threshold, size = 1) +
  geom_boxplot(aes(color = Phylum)) +
  pl +
  labs(title = "Most Common Eukaryotes", y = "Abundance") +
  facet_grid(~Day) +
  scale_y_continuous(breaks = c(.2, .4, .6, .8)) +
  theme(axis.text.x=element_blank(),
        axis.title.x = element_blank(),
        legend.position = "bottom",
        strip.background = element_blank()) +
  guides(color=guide_legend(nrow=2,byrow=TRUE))
scree.18s

# Get the medians shown in the graph
glom.18s.days.top.median <- aggregate_tax_by_day(glom.18s.days.top.melt)

# Select the genera with medians above the cutoff.
glom.18s.days.top.median.top <- glom.18s.days.top.median %>%
  filter(Abundance > selection_threshold) %>% arrange(Day, -Abundance)
glom.18s.days.top.median.top %>% arrange(Day, -Abundance)
##      OTU          Rank1             Rank2              Rank3    Day
## 1 OTU_13 D_0__Eukaryota D_1__Opisthokonta       D_2__Holozoa  Day 8
## 2  OTU_9 D_0__Eukaryota          D_1__SAR D_2__Stramenopiles  Day 8
## 3 OTU_30 D_0__Eukaryota          D_1__SAR D_2__Stramenopiles  Day 8
## 4 OTU_12 D_0__Eukaryota D_1__Opisthokonta       D_2__Holozoa  Day 8
## 5  OTU_3 D_0__Eukaryota          D_1__SAR D_2__Stramenopiles Day 79
##                                    Taxonomy  Abundance
## 1  Metazoa (Animalia), Podocopa, Podocopida 0.21192083
## 2 Ochrophyta, Bacillariophyceae, Sellaphora 0.21091034
## 3 Ochrophyta, Bacillariophyceae, Pinnularia 0.12519280
## 4   Metazoa (Animalia), Copepoda, Calanoida 0.06991901
## 5  Ochrophyta, Bacillariophyceae, Nitzschia 0.94635118
# Select these OTUs from all days, for use in the line graphs
glom.18s.all.select_taxa <- prune_taxa(as.character(glom.18s.days.top.median.top$OTU), glom6.18s)
glom.18s.all.select_taxa
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 5 taxa and 68 samples ]
## sample_data() Sample Data:       [ 68 samples by 9 sample variables ]
## tax_table()   Taxonomy Table:    [ 5 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 5 tips and 4 internal nodes ]
plot_grid(scree.16s, scree.18s, labels = c("A", "B"), ncol = 2, align = "h")

Community coverage of the select_taxa

Let’s evaluate what proportion of the overall community is captured by the most common genera we are showing here.

sum(sample_sums(glom6.16s.all.select_taxa))/sum(sample_sums(glom6.16s))
## [1] 0.5475895
sum(sample_sums(glom.18s.all.select_taxa))/sum(sample_sums(glom.18s))
## [1] 11.12384

16S time series line plot

Note that we intentionally replace these taxonomies with more useful annotations.

# Graph this: glom6.16s.all.select_taxa
select.16s.melt <- clean_silva_tax(psmelt(glom6.16s.all.select_taxa))
#head(select.16s.melt)

# Aggregate for graphing
select.16s.median <- aggregate_tax_by_day_n(select.16s.melt)

# Add category for the founders species
select.16s.median.founders <- select.16s.median[select.16s.median$Abundance > selection_threshold & select.16s.median$Day == "8", ]

select.16s.median$`Founders Species` <- select.16s.median$OTU %in% select.16s.median.founders$`OTU`
select.16s.median$`Founders Species` <- factor(select.16s.median$`Founders Species`,
                                               levels = c("TRUE", "FALSE"),
                                               labels = c("Founders Species", "Other Common Genera"))

# replace taxa names
#select.16s.median$Taxonomy[select.16s.median$Taxonomy == "GMD14H09, "] <- "Deltaproteobacteria, GMD14H09"

select.16s.gg <- ggplot(select.16s.median, aes(x = Day, y = Abundance))
#select.16s.gg + geom_bar(aes(fill = Taxonomy), color = 'black', stat = 'identity')
selected.16s.line <- select.16s.gg +
  geom_line(aes(color = Taxonomy, linetype=`Founders Species`)) +
  geom_point(aes(color = Taxonomy)) + v +
  scale_linetype(name="", guide = T) +
  scale_x_continuous("Sampling Day", c(8,14,35,56,79), NULL) +
  theme(legend.spacing = unit(-0.5, "cm"), legend.background = element_blank()) + 
  #   #This next line is a kludge. I manually dash the legend lines to show which ones are FS.
  #guides(color=guide_legend(override.aes=list(linetype=c(2,2,2,1,1,1,2,1), shape = c(NA))))
  guides(color=guide_legend(override.aes=list(linetype=c(2,2,1,2,1,1), shape = c(NA)), order = 1)
                   ,linetype = guide_legend(override.aes=list(linetype=c(1,2)), order = 2)
)
# 2 is dashed, 1 is solid
selected.16s.line

# Table of species
select.16s.median %>%
  dplyr::select(Rank1, Rank2, Rank3, Taxonomy, `Founders Species`) %>%
  unique %>% kable(row.names = F)
Rank1 Rank2 Rank3 Taxonomy Founders Species
D_0__Bacteria D_1__Bacteroidetes D_2__Rhodothermia Balneolales, Balneolaceae, Rhodohalobacter Other Common Genera
D_0__Bacteria D_1__Proteobacteria D_2__Deltaproteobacteria Bradymonadales, Bradymonadaceae, uncultured bacterium Other Common Genera
D_0__Bacteria D_1__Cyanobacteria D_2__Oxyphotobacteria Nostocales, Geitlerinemaceae, Geitlerinema PCC-7105 Founders Species
D_0__Bacteria D_1__Proteobacteria D_2__Gammaproteobacteria Oceanospirillales, Saccharospirillaceae, Saccharospirillum Other Common Genera
D_0__Bacteria D_1__Cyanobacteria D_2__Oxyphotobacteria Phormidesmiales, Nodosilineaceae, uncultured Founders Species
D_0__Bacteria D_1__Proteobacteria D_2__Alphaproteobacteria Sphingomonadales, Sphingomonadaceae, Erythrobacter Founders Species

18S time series line plot

# Graph this: glom6.16s.all.select_taxa
select.18s.melt <- clean_silva_tax(psmelt(glom.18s.all.select_taxa))
#head(select.18s.melt)

# Aggregate for graphing
select.18s.median <- aggregate_tax_by_day_n(select.18s.melt)

# Add category for the founders species
select.18s.median.founders <- select.18s.median[select.18s.median$Abundance > selection_threshold & select.18s.median$Day == "8", ]

select.18s.median$`Founders Species` <- select.18s.median$OTU %in% select.18s.median.founders$`OTU`
select.18s.median$`Founders Species` <- factor(select.18s.median$`Founders Species`,
                                               levels = c("TRUE", "FALSE"),
                                               labels = c("Founders Species", "Other Common Genera"))

select.18s.gg <- ggplot(select.18s.median, aes(x = Day, y = Abundance))
#select.18s.gg + geom_bar(aes(fill = Taxonomy), color = 'black', stat = 'identity')
select.18s.line <- select.18s.gg +
  geom_line(aes(color = Taxonomy, linetype=`Founders Species`)) +
  geom_point(aes(color = Taxonomy)) + pl +
  scale_linetype(name= element_blank(), guide = F) +
  scale_x_continuous("Sampling Day", c(8,14,35,56,79), NULL) +
  theme(legend.spacing = unit(-0.5, "cm"), legend.background = element_blank()) +
  #   #This next line is a kludge. I manually dash the legend lines to show which ones are FS.
  # guides(color     = guide_legend(override.aes=list(linetype=c(1,1,1,1,2,2,1), shape = c(NA)), order = 1)
   guides(color     = guide_legend(override.aes=list(linetype=c(1,1, 2, 1,1), shape = c(NA)), order = 1)
         )

select.18s.line

# Table of species
select.18s.median %>%
  dplyr::select(Rank1, Rank2, Rank3, Taxonomy, `Founders Species`) %>%
  unique %>% kable(row.names = F)
Rank1 Rank2 Rank3 Taxonomy Founders Species
D_0__Eukaryota D_1__Opisthokonta D_2__Holozoa Metazoa (Animalia), Copepoda, Calanoida Founders Species
D_0__Eukaryota D_1__Opisthokonta D_2__Holozoa Metazoa (Animalia), Podocopa, Podocopida Founders Species
D_0__Eukaryota D_1__SAR D_2__Stramenopiles Ochrophyta, Bacillariophyceae, Nitzschia Other Common Genera
D_0__Eukaryota D_1__SAR D_2__Stramenopiles Ochrophyta, Bacillariophyceae, Pinnularia Founders Species
D_0__Eukaryota D_1__SAR D_2__Stramenopiles Ochrophyta, Bacillariophyceae, Sellaphora Founders Species

Combined time series graph

scree.line.both.alt <- plot_grid(rel_heights = c(1, 1.1),
                                 labels = c("A", "B", "C", "D"),
          scree.16s + theme(legend.position = "right") + guides(color=guide_legend(byrow=F)),
          scree.18s + theme(legend.position = "right") + guides(color=guide_legend(byrow=F)),
          selected.16s.line,
          select.18s.line)
#scree.line.both.alt


ggsave("./figures/scree_and_line_silva.pdf", scree.line.both.alt, scale = 1.6, width = 183, height = 82, units = "mm")

Inspect specific microbes to correct or contextualize their taxonomy

Note: This section reflects limitations of our initial filtering and taxonomy methods. Now that we use a unified tabase and filter out chloroplasts and mitochondria, many of these issues are mitigated.

glom6.16s.all.select_taxa %>% tax_table()
# Look at OTU_18 (o__GMD14H09) and OTU_574 (o__Stramenopiles)

# ¿Qué es un o__GMD14H09?
inspect.o__GMD14H09 <- full16s %>%
  merge_samples(group = "SampleType") %>%
  psmelt() %>% filter(Rank4 == "o__GMD14H09")
inspect.o__GMD14H09 %>% head
# This family is mostly OTU_18
"
>OTU_18
TACGGAGGGTGCAAGCGTTGTTCGGAATCATTGGGCGTAAAGGGCGCGCAGGCGGTCTTTCAAGTCCGGCGTGAAATCCC
AGGGCTCAACCCTGGAACTGCGTTGGAAACTGGACGACTTGAGTATGGGAGAGGTTCGTGGAATTCCAGGTGTAGGGGTG
AAATCCGTAGATATCTGGAGGAACACCGGCGGCGAAAGCGACGAACTGGACCAACACTGACGCTGAGGCGCGAAAGCGTG
GGGAGCAAACA
"
# Silva: 88.9% Bacteria;Proteobacteria;Deltaproteobacteria;Bradymonadales;
# (Silva also has one hit to Desulfuromonadales)
# NCBI Megablast: Many hits at 86% that match to
# Bacteria; Proteobacteria; Deltaproteobacteria; Desulfuromonadales



# Stranger Stramenopiles:

# NOTE: When we removed c__Chloroplasts, we got rid of o__Stramenopiles.
# But we can still get them from full16s
inspect.o__Stramenopiles <- full16s %>%
  merge_samples(group = "SampleType") %>%
  psmelt() %>% filter(Rank4 == "o__Stramenopiles")
inspect.o__Stramenopiles %>% head
# Many OTUs, including OTU_1 are inside this order. I'm wondering if these are
# just different chloroplasts...
"
>OTU_574
GACGGAGGATGCAAGTGTTATCCGGAATCACTGGGCGTAAAGCGTCTGTAGGTGGTTTAATAAGTCAACTGTTAAATCTT
GAAGCTCAACTTCAAAATCGCAGTCGAAACTATTAGACTAGAGTATAGTAGGGGTAAAGGGAATTTCCAGTGGAGCGGTG
AAATGCGTAGAGATTGGAAAGAACACCGATGGCGAAGGCACTTTACTGGGCTATTACTGACACTCAGAGACGAAAGTGTG
GGGAGCAAACA
>OTU_148
GACGGAGGATGCAAGTGTTATCCGGAATCACTGGGCGTAAAGCGTCTGTAGGTGGTTTAATAAGTCAACTGTTAAATCTT
GAGGCTCAACCTCAAAATCGCAGTCGAAACTATTAGACTAGAGTATAGTAGGGGTAAAGGGAATTTCCAGTGGAGCGGTG
AAATGCGTAGATATTGGAAGGAACACCGATGGCGAAGGCACTTTACTGGGCTATTTATTACTAACACTCAGAGACGAAAG
CTAGGGTAGCA
>OTU_1
GACGGAGGATGCAAGTGTTATCCGGAATCACTGGGCGTAAAGCGTCTGTAGGTGGTTTAGTAAGTCAACTGTTAAATCTT
GAAGCTCAACTTCAAAACCGCAGTCGAAACTATTAGACTAGAGTATAGTAGGGGTAAAGGGAATTTCCAGTGGAGCGGTG
AAATGCGTAGAGATTGGAAAGAACACCGATGGCGAAGGCACTTTACTGGGCTATTACTGACACTCAGAGACGAAAGCTAG
GGTAGCAAATG
"
# Silva: All hits 97% - 99% Bacteria;Cyanobacteria;Chloroplast;
# ... Yep. Totally Chloroplasts.


# Lake Tomatoes:

# More searches with SILVA will help us understand if
# "Charophyta, Solanales, Solanum" is really a Lake Tomato
# https://en.wikipedia.org/wiki/Solanum

# You say potato, I say over-identification. Let's find that taxa in our original, full data set.
full18s %>%
  subset_taxa(Rank4 == "D_3__Charophyta") %>%
  plot_bar(fill = "Rank7", title = "Lake Tomatoes of Unusual Size")
# Our 'tomatoes' are  flourishing. But what is their amplicon sequence?
full18s %>% merge_samples(group = "SampleType") %>%
  subset_taxa(Rank7 == "D_12__Solanum lycopersicum (tomato)") %>% psmelt %>% head

# grep for OTU_16, 45 and 46
"
>OTU_16
AAGTCATGGAAGCTGGCAGCGCCCGAAGTCGCCTCGAATCAGGGGTGCCCACGGTGAGGCTGGTGACTGGGACTAAGTCG
TAACAAGGTAGCC
>OTU_45
ACACCATGGGAGTTGGCTTTACCCGAAGCCGGTGCGCTAACCGCAAGGAGGCAGCCGTCCACGGTAAGGTCAGCGACTGG
GGTGAAGTCGTAACAAGGTAGCC
>OTU_46
ACACCATGGGAGTTGGCTCTACCCGAAGACGCTGTGCTAACCGCAAGGGGGCAGGCGGCCACGGTAGGGTCAGCGACTGG
GGTGAAGTCGTAACAAGGTAGCC
"
# 45 and 46 are similar, while 16 is much shorter from a deletion in the first half.
# My taxonomy annotation was based on a search of SILVA 18S, and returned hits
# around 70%. A quick search of the 16S gene in SILVA returns hits above 94%
# and a LCA tax of:
# OTU_16 Bacteria;Planctomycetes;Phycisphaerae;Phycisphaerales;Phycisphaeraceae;
# OTU_45 Bacteria;Proteobacteria;Alphaproteobacteria;
# OTU_46 Bacteria;Proteobacteria;Alphaproteobacteria;Alphaproteobacteria Incertae Sedis;Unknown Family;

# Given that we see some Planctomycetes lots Proteobacteria of, these are
# probably off-target effects of the 18S primers, and not lake tomatoes after all.

Modeling Ecological Diversity

Here we make use of the methods implemented in the betapart and picante package and described in this paper by James Stegen.

https://cran.r-project.org/web/packages/betapart/betapart.pdf https://github.com/skembel/picante

The these packages don’t like the phyloseq:: data structures. So let’s export phyloseq objects in a picante friendly way.

# This function does not export the taxonomy table
# and does not include a trait table.
setClass("ps.pic", representation(phy = "phylo", otu = "matrix"))

phyloseq_to_picante <- function(psobject){
  if(class(psobject) != "phyloseq") stop("Input must be a phyloseq object")
  return(new("ps.pic", phy = psobject@phy_tree,
        otu = if(psobject@otu_table@taxa_are_rows) t(psobject@otu_table) else psobject@otu_table
    )
  )
}

# This must be the worst S4 class ever constructed.


# phyloseq_to_picante(glom.18s)@phy
# phyloseq_to_picante(glom.18s)@otu %>% head
# phyloseq_to_picante(glom.18s)@otu %>% class

Nestedness vs Turnover

Any differences between two samples can explained by either the loss of a shared species or the introduction of a unique species. (Beta diversity can be partitions into Nestedness and Turnover.)

See betapart: an R package for the study of beta diversity and the vignette (PDF).

# Export to picante

# use full cohort...
# rar.16s.cohort.ex <- rar.16s.cohort %>% phyloseq_to_picante
# or remove otus that never appear...
# rar.16s.cohort.ex <- rar.16s.cohort %>% filter_taxa(flist = function(x) max(x) > 0, prune = T) %>% phyloseq_to_picante # 2373 taxa
# or remove singletons.
rar.16s.cohort.ex <- rar.16s.cohort %>% filter_taxa(flist = function(x) sum(x) > 1, prune = T) %>% phyloseq_to_picante
rar.18s.cohort.ex <- rar.18s.cohort %>% filter_taxa(flist = function(x) sum(x) > 1, prune = T) %>% phyloseq_to_picante

# Convert to binary
rar.16s.cohort.ex.b <- rar.16s.cohort.ex
rar.16s.cohort.ex.b@otu[rar.16s.cohort.ex.b@otu > 0] = 1
rar.18s.cohort.ex.b <- rar.18s.cohort.ex
rar.18s.cohort.ex.b@otu[rar.18s.cohort.ex.b@otu > 0] = 1

# merge both data sets
# copy OTU table
rar.both.ex <- rar.18s.cohort.ex.b@otu
# rename OTUs
taxa_names(rar.both.ex) <- rar.both.ex %>% taxa_names() %>% paste("18s", sep = "_")
# merge, and save
rar.both.ex <- merge_phyloseq_pair(rar.both.ex, rar.16s.cohort.ex.b@otu)


rar.16s.cohort.ex.j <- beta.pair(rar.16s.cohort.ex.b@otu, index.family = "jaccard")
rar.18s.cohort.ex.j <- beta.pair(rar.18s.cohort.ex.b@otu, index.family = "jaccard")
rar.both.ex.j <- beta.pair(rar.both.ex, index.family = "jaccard")

# $ beta.jtu is turnover, beta.jne is nestedness, beta.jac is combined Jaccard
#rar.16s.cohort.ex.j$beta.jtu


distmelt <- function(d){
  d.melt <- d %>% as.matrix %>% as.data.frame %>% tibble::rownames_to_column() %>% melt()
  d.melt$variable <- as.character(d.melt$variable)
  return(d.melt[d.melt$variable > d.melt$rowname, ])
}

jaccard.melt <- rar.16s.cohort.ex.j$beta.jac %>% distmelt()
## Using rowname as id variables
turnover.melt <- rar.16s.cohort.ex.j$beta.jtu %>% distmelt()
## Using rowname as id variables
nestedness.melt <- rar.16s.cohort.ex.j$beta.jne %>% distmelt()
## Using rowname as id variables
jaccard.melt.18s <- rar.18s.cohort.ex.j$beta.jac %>% distmelt()
## Using rowname as id variables
turnover.melt.18s <- rar.18s.cohort.ex.j$beta.jtu %>% distmelt()
## Using rowname as id variables
nestedness.melt.18s <- rar.18s.cohort.ex.j$beta.jne %>% distmelt()
## Using rowname as id variables
jaccard.melt.both <- rar.both.ex.j$beta.jac %>% distmelt()
## Using rowname as id variables
turnover.melt.both <- rar.both.ex.j$beta.jtu %>% distmelt()
## Using rowname as id variables
nestedness.melt.both <- rar.both.ex.j$beta.jne %>% distmelt()
## Using rowname as id variables
# 16S
# jaccard.melt %>% ggplot(aes(y = variable, x = rowname, fill = value)) + geom_raster()
# turnover.melt %>% ggplot(aes(y = variable, x = rowname, fill = value)) + geom_raster()
# nestedness.melt %>% ggplot(aes(y = variable, x = rowname, fill = value)) + geom_raster()
# 18S
# jaccard.melt.18s %>% ggplot(aes(y = variable, x = rowname, fill = value)) + geom_raster()
# turnover.melt.18s %>% ggplot(aes(y = variable, x = rowname, fill = value)) + geom_raster()
# nestedness.melt.18s %>% ggplot(aes(y = variable, x = rowname, fill = value)) + geom_raster()

# Merge for combined graph
jaccard.melt$Type <- "Total"
turnover.melt$Type <- "Turnover"
nestedness.melt$Type <- "Nestedness"
all.melt <- rbind(jaccard.melt, turnover.melt, nestedness.melt)

jaccard.melt.18s$Type <- "Total"
turnover.melt.18s$Type <- "Turnover"
nestedness.melt.18s$Type <- "Nestedness"
all.melt.18s <- rbind(jaccard.melt.18s, turnover.melt.18s, nestedness.melt.18s)

jaccard.melt.both$Type <- "Total"
turnover.melt.both$Type <- "Turnover"
nestedness.melt.both$Type <- "Nestedness"
all.melt.both <- rbind(jaccard.melt.both, turnover.melt.both, nestedness.melt.both)
# Add columns listing the categories that the samples are in. Match them using grep.
fix_rows <- function(df){

  try(
    # try() to add nice labels, but don't mentioned it if there is no $Type
    df$Type <- factor(df$Type, levels = c("Total", "Nestedness", "Turnover")), silent = T
  )

  df$row_cat <- 1
  df$row_cat[grepl("T2", df$rowname, fixed = T)] <- 8
  df$row_cat[grepl("T3", df$rowname, fixed = T)] <- 14
  df$row_cat[grepl("T4", df$rowname, fixed = T)] <- 35
  df$row_cat[grepl("T5", df$rowname, fixed = T)] <- 56
  df$row_cat[grepl("T6", df$rowname, fixed = T)] <- 79

  df$var_cat <- 1
  df$var_cat[grepl("T2", df$variable, fixed = T)] <- 8
  df$var_cat[grepl("T3", df$variable, fixed = T)] <- 14
  df$var_cat[grepl("T4", df$variable, fixed = T)] <- 35
  df$var_cat[grepl("T5", df$variable, fixed = T)] <- 56
  df$var_cat[grepl("T6", df$variable, fixed = T)] <- 79

  df$var_cat <- factor(df$var_cat, levels = c(79,56,35,14,8))

  return(df)
}

all.melt <- all.melt %>% fix_rows
all.melt.18s <- all.melt.18s %>% fix_rows
all.melt.both <- all.melt.both %>% fix_rows

# Reverse this category so that it makes a nice triangle on the graph.
all.melt$var_cat <- all.melt$var_cat %>% factor(levels = c(79,56,35,14,8))
all.melt.18s$var_cat <- all.melt.18s$var_cat %>% factor(levels = c(79,56,35,14,8))
all.melt.both$var_cat <- all.melt.both$var_cat %>% factor(levels = c(79,56,35,14,8))


# Better combined graphs
plot_dm <- function(df, ylab = "Sampling Day"){
  return(
    ggplot(df, aes(y = variable, x = rowname, fill = value)) + geom_raster() +
      theme(strip.background = element_blank(),
            axis.text = element_blank(),
            axis.ticks = element_blank(),
            axis.title.x = element_blank(),
            panel.grid = element_blank(),
            panel.border = element_blank(),
            legend.position = c(.95,.4)) +
      labs(y = ylab, fill = "Binary \n Jaccard \nDistance") +
      facet_grid(var_cat ~ Type + row_cat, scales = "free", switch = "y")
  )
}

all.melt %>%
  plot_dm(ylab = "Bacteria Sampling Day") +
  scale_fill_viridis(limits=c(0,1))

all.melt.18s %>%
  plot_dm(ylab = "Eukaryotes Sampling Day") +
  scale_fill_viridis(limits=c(0,1), option = "A") # I'm using magma, as plasma was to bright.

all.melt.both %>%
  plot_dm + scale_fill_continuous(low = "#222222", high = "#EEEEEE") # use grays for combined plot

While these are not perfect visualizations, they are good reminders that we could compare the average between groups using ANOVA + Tukey test.

Jaccard notes

  • These are distance metrics:
  • large means more different microbes
  • This partitioning works like a sum.
  • Total Jaccard == Turnover + Nestedness
  • all.melt %>% filter(rowname == "T4.14", variable == "T4.19")

Questions:

Let’s use the vegan::adonis test to see how much variation of nestedness and turnover can be attributed to sampling day. Note that while Jaccard distances of nestedness add up to total Jaccard distance, these R2 values will not sum up. Rather, this qualitative measurement shows of if sampling day better correlates with nestedness or turnover.

df = as(sample_data(rar.16s.cohort), "data.frame")
day.16s <- rbind(
adonis(rar.16s.cohort.ex.j$beta.jac ~ Day, df)$aov.tab %>% data.frame %>% filter(Df == "4") %>% data.frame(Microbe = "Bacteria", Type = "Total"), # Total .33
adonis(rar.16s.cohort.ex.j$beta.jtu ~ Day, df)$aov.tab %>% data.frame %>% filter(Df == "4") %>% data.frame(Microbe = "Bacteria", Type = "Turnover"), # Turnover .19
adonis(rar.16s.cohort.ex.j$beta.jne ~ Day, df)$aov.tab %>% data.frame %>% filter(Df == "4") %>% data.frame(Microbe = "Bacteria", Type = "Nestedness")# Nestedness .51
)

df = as(sample_data(rar.18s.cohort), "data.frame")
day.18s <- rbind(
adonis(rar.18s.cohort.ex.j$beta.jac ~ Day, df)$aov.tab %>% data.frame %>% filter(Df == "4") %>% data.frame(Microbe = "Eukaryotes", Type = "Total"), # Total .23
adonis(rar.18s.cohort.ex.j$beta.jtu ~ Day, df)$aov.tab %>% data.frame %>% filter(Df == "4") %>% data.frame(Microbe = "Eukaryotes", Type = "Turnover"), # Turnover .25
adonis(rar.18s.cohort.ex.j$beta.jne ~ Day, df)$aov.tab %>% data.frame %>% filter(Df == "4") %>% data.frame(Microbe = "Eukaryotes", Type = "Nestedness") # Nestedness .04
)

# We have to merge the sample_data from 16s and 18s to cover 'both'
df <- df %>% rbind(as(sample_data(rar.16s.cohort), "data.frame"), as(sample_data(rar.18s.cohort), "data.frame")) %>% unique
day.both <- rbind(
adonis(rar.both.ex.j$beta.jac ~ Day, df)$aov.tab %>% data.frame %>% filter(Df == "4") %>% data.frame(Microbe = "Combined", Type = "Total"), # Total .23
adonis(rar.both.ex.j$beta.jtu ~ Day, df)$aov.tab %>% data.frame %>% filter(Df == "4") %>% data.frame(Microbe = "Combined", Type = "Turnover"), # Turnover .25
adonis(rar.both.ex.j$beta.jne ~ Day, df)$aov.tab %>% data.frame %>% filter(Df == "4") %>% data.frame(Microbe = "Combined", Type = "Nestedness") # Nestedness .04
)

rbind(day.16s, day.18s, day.both) %>% kable()
Df SumsOfSqs MeanSqs F.Model R2 Pr..F. Microbe Type
4 5.7921464 1.4480366 8.451756 0.3387239 0.001 Bacteria Total
4 1.8001632 0.4500408 4.029431 0.1962758 0.001 Bacteria Turnover
4 0.8712268 0.2178067 19.328900 0.5394779 0.001 Bacteria Nestedness
4 4.2542870 1.0635717 5.220543 0.2489465 0.001 Eukaryotes Total
4 3.8744972 0.9686243 6.344729 0.2871603 0.001 Eukaryotes Turnover
4 -0.0521791 -0.0130448 -1.936532 -0.1401916 1.000 Eukaryotes Nestedness
4 5.0634678 1.2658670 4.982642 0.2014601 0.001 Combined Total
4 3.0673439 0.7668360 4.490660 0.1852532 0.001 Combined Turnover
4 0.1930365 0.0482591 2.987477 0.1313900 0.023 Combined Nestedness

More Nestedness and Turnover

How do we measure the priority effects of the Founder Species?

  • If the founder species exert a priority effect, community structure will be defined by the founders. The final community will be a nested subset of the previously observe microbes.
  • If the founder species cannot / do not exert a priority effect, community structure will be defined by introduced microbes. Turnover will have forced the founders out and the final community will be distinct from previously observed microbes.

Nestedness indicates a priority effect.

We already measured this with betapart, but we want a metric which is super simple.

In this section, we will count how many microbes appear at a timepoint that were not in any previous timepoint. We will divide that by the total number of microbes seen so far, to get a metric showing ‘percent new microbes’ which is equal to turnover / dispersion.

“Counting is the hardest part of Bioinformatics” - Lee Ann McCue

Related concepts: - alpha rarefaction asks ‘by what read depth are no new microbes introduced’ while we are asking ‘by what sampling time are no new microbes introduced’. - This is the finite difference of cumulative alpha diversity over time - This is the first derivative of gamma diversity over time

# First, merge by day
rar.16s.cohort.day <- merge_samples(rar.16s.cohort, group = "Day")

total.taxa <- data.frame(Day = factor(c("Day 8", "Day 14", "Day 35", "Day 56", "Day 79"),
                                      levels = c("Day 8", "Day 14", "Day 35", "Day 56", "Day 79")))

total.taxa$`Total OTUs` <- c(
subset_samples(rar.16s.cohort.day, Day %in% 1) %>% filter_taxa(function(x) max(x) > 0, TRUE) %>% ntaxa(),
subset_samples(rar.16s.cohort.day, Day %in% 1:2) %>% filter_taxa(function(x) max(x) > 0, TRUE) %>% ntaxa(),
subset_samples(rar.16s.cohort.day, Day %in% 1:3) %>% filter_taxa(function(x) max(x) > 0, TRUE) %>% ntaxa(),
subset_samples(rar.16s.cohort.day, Day %in% 1:4) %>% filter_taxa(function(x) max(x) > 0, TRUE) %>% ntaxa(),
subset_samples(rar.16s.cohort.day, Day %in% 1:5) %>% filter_taxa(function(x) max(x) > 0, TRUE) %>% ntaxa()
)

# sanity check
rar.16s.cohort.day %>% filter_taxa(function(x) max(x) > 0, TRUE) %>% ntaxa() # Good. We got all 5 days.
## [1] 2276
subset_samples(rar.16s.cohort.day, Day %in% 1) %>% estimate_richness() # We are reporting all Observed OTUs.
##    Observed    Chao1 se.chao1     ACE   se.ACE  Shannon   Simpson
## X8      999 1418.292  60.4835 1438.23 20.36962 3.899723 0.9230088
##    InvSimpson   Fisher
## X8    12.9885 175.4096
rar.16s.cohort.day %>% ntaxa # and the difference between our count and the total...
## [1] 2316
rar.16s.cohort.day %>% taxa_sums() %>% table %>% head # ... is equal to the number of 0s in our table
## .
##   0   1   2   3   4   5 
##  40 412 298 208 114  88
total.taxa$`New OTUs` <- total.taxa$`Total OTUs` - lag(total.taxa$`Total OTUs`)
#total.taxa

# 18s
rar.18s.cohort.day <- merge_samples(rar.18s.cohort, group = "Day")

total.euks <- data.frame(Day = factor(c("Day 8", "Day 14", "Day 35", "Day 56", "Day 79"),
                                      levels = c("Day 8", "Day 14", "Day 35", "Day 56", "Day 79")))

total.euks$`Total OTUs` <- c(
subset_samples(rar.18s.cohort.day, Day %in% 1) %>% filter_taxa(function(x) max(x) > 0, TRUE) %>% ntaxa(),
subset_samples(rar.18s.cohort.day, Day %in% 1:2) %>% filter_taxa(function(x) max(x) > 0, TRUE) %>% ntaxa(),
subset_samples(rar.18s.cohort.day, Day %in% 1:3) %>% filter_taxa(function(x) max(x) > 0, TRUE) %>% ntaxa(),
subset_samples(rar.18s.cohort.day, Day %in% 1:4) %>% filter_taxa(function(x) max(x) > 0, TRUE) %>% ntaxa(),
subset_samples(rar.18s.cohort.day, Day %in% 1:5) %>% filter_taxa(function(x) max(x) > 0, TRUE) %>% ntaxa()
)

total.euks$`New OTUs` <- total.euks$`Total OTUs` - lag(total.euks$`Total OTUs`)
total.euks
##      Day Total OTUs New OTUs
## 1  Day 8        291       NA
## 2 Day 14        541      250
## 3 Day 35        649      108
## 4 Day 56        665       16
## 5 Day 79        677       12
# Combined
total.combined <- data.frame(Day = factor(c("Day 8", "Day 14", "Day 35", "Day 56", "Day 79"),
                                      levels = c("Day 8", "Day 14", "Day 35", "Day 56", "Day 79")))

total.combined$`Total OTUs` <- total.taxa$`Total OTUs` + total.euks$`Total OTUs`
total.combined$`New OTUs` <- total.taxa$`New OTUs` + total.euks$`New OTUs`

total.combined$Type <- "Combined"
total.taxa$Type <- "Bacteria"
total.euks$Type <- "Eukaryotes"

total.all <- rbind(total.combined, total.taxa, total.euks)

# Replace NA (all OTUs are new at the first timepoint)
total.all$`New OTUs`[is.na(total.all$`New OTUs`)] <- total.all$`Total OTUs`[is.na(total.all$`New OTUs`)]

# calculate observed turnover
total.all$`Percent New OTUs` <- total.all$`New OTUs` / total.all$`Total OTUs`
# reorder levels
total.all$Type <- total.all$Type %>% factor(levels = c("Bacteria", "Eukaryotes", "Combined"))

# rename variables
total.all$`Sum of all observed OTUs` <- total.all$`Total OTUs`
total.all$`Sum of previously unobserved OTUs` <- total.all$`New OTUs`
total.all$`Fraction of community composed of\npreviously unobserved OTUs ` <- total.all$`Percent New OTUs`

# Optional: remove old variables
total.all$`Total OTUs` <- NULL
total.all$`New OTUs` <- NULL
total.all$`Percent New OTUs` <- NULL

total.all.m <- melt(total.all)
## Using Day, Type as id variables
total.all.m$variable <- factor(total.all.m$variable, levels = levels(total.all.m$variable)[c(3,2,1)])

total.all.m %>%
  ggplot(aes(x = Day, y = value, color = Type)) +
  geom_point(alpha = .9) +
  geom_line(aes(group = Type), size = 1, alpha = .5) +
  facet_wrap(~variable, scales = "free_y") +
  #scale_color_manual(values = c("#5DC863", "#B52F8C", "#777777")) +
  scale_color_manual(values = c("#6DCD59", "#9F2F7FFF", "#777777")) +
  theme(axis.title = element_blank(),
#        legend.position = c(.92,.6), # legend in the left panel
        legend.position = c(.22,.62), # legend in the right panel
        legend.title = element_blank()
        #, panel.spacing.x = unit(30, "pt") # add spacing between plots for equations
        )

# viridis(10, option = "D")[8]
# viridis(10, option = "A")[5]

total.all.m %>% filter(Type == "Combined", Day == "Day 56")
##      Day     Type
## 1 Day 56 Combined
## 2 Day 56 Combined
## 3 Day 56 Combined
##                                                         variable
## 1                                       Sum of all observed OTUs
## 2                              Sum of previously unobserved OTUs
## 3 Fraction of community composed of\npreviously unobserved OTUs 
##          value
## 1 2.869000e+03
## 2 9.200000e+01
## 3 3.206692e-02

Beta NTI

Are the selective pressures resulting in nestedness and turnover related to phylogenetic composition / functional niche of the microbes? Beta NTI shows us if phylogenetic turnover is caused by niche-based processes.

The following code was provided by Emily B. Graham and used with her guidance and permission. We have modified it slightly. It is related to script from James Stegen’s script from ISME 2013, which is available on GitHub.

run_beta_nti <- function(phylo, otu, reps = 999){
  #phylo is phylogentic tree
  #otu is otu matrix
  match.phylo.otu.trim = match.phylo.data(phylo, t(otu))#trims tree to only otus in otu table

  beta.type = 'bNTI';#set to calculate bNTI
  beta.reps = reps;#set reps

  emp.weighted.neighbor.trim =
    as.matrix(comdistnt(t(match.phylo.otu.trim$data),
                      cophenetic(match.phylo.otu.trim$phy),
                      abundance.weighted=T,
                      exclude.conspecifics = F))
  ## empirical mean neighbor
  #print(emp.weighted.neighbor.trim)


  rand.weighted.neighbor.comp.trim =
    array(c(-999), dim=c(nrow(otu), nrow(otu), beta.reps))
  #print(dim(rand.weighted.neighbor.comp.trim))

  for (b.rep in 1:beta.reps) {
      rand.weighted.neighbor.comp.trim[,,b.rep] =
        as.matrix(comdistnt(t(match.phylo.otu.trim$data),
                            taxaShuffle(cophenetic(match.phylo.otu.trim$phy)),
                            abundance.weighted=T, exclude.conspecifics = F))
    print(c(b.rep, date()))
  }

  #lapply version of the above loop
  # lapply(1:beta.reps, function(b.rep){
  #   rand.weighted.neighbor.comp.trim[,,b.rep] =
  #     as.matrix(comdistnt(t(match.phylo.otu.trim$data),
  #                         taxaShuffle(cophenetic(match.phylo.otu.trim$phy)),
  #                         abundance.weighted=T, exclude.conspecifics = F))
  #   print(c(b.rep, date()))
  # })

  #print(rand.weighted.neighbor.comp.trim)

  ses.weighted.neighbor.trim = matrix(c(NA),nrow=nrow(otu),ncol=nrow(otu));

  for (columns in 1:(nrow(otu)-1)) {
    for (rows in (columns+1):nrow(otu)) {

      rand.vals = rand.weighted.neighbor.comp.trim[rows,columns,];
      ses.weighted.neighbor.trim[rows,columns] = (emp.weighted.neighbor.trim[rows,columns] - mean(rand.vals)) / sd(rand.vals);
      rm("rand.vals");

    };
  };

  rownames(ses.weighted.neighbor.trim) = rownames(otu)
  colnames(ses.weighted.neighbor.trim) = rownames(otu)

  print(ses.weighted.neighbor.trim[1:5, 1:5])
  return(ses.weighted.neighbor.trim)

}

# We will export these again, this time without filtering for singletons
rar.16s.cohort.ex <- rar.16s.cohort %>% filter_taxa(flist = function(x) max(x) > 0, prune = T) %>% phyloseq_to_picante # 2373 taxa

rar.16s.cohort.bnti <- run_beta_nti(rar.16s.cohort.ex@phy, rar.16s.cohort.ex@otu, 99)

rar.16s.cohort.bnti %>% write.csv("rar.16s.cohort.bnti.csv", F, F, row.names = T, col.names = T)
## Warning in write.csv(., "rar.16s.cohort.bnti.csv", F, F, row.names = T, :
## attempt to set 'col.names' ignored

Repeat this for 18S.

# We will export these again, this time without filtering for singletons
rar.18s.cohort.ex <- rar.18s.cohort %>% filter_taxa(flist = function(x) max(x) > 0, prune = T) %>% phyloseq_to_picante # 2373 taxa

rar.18s.cohort.bnti <- run_beta_nti(rar.18s.cohort.ex@phy, rar.18s.cohort.ex@otu, 99)

rar.18s.cohort.bnti[20:25,20:25]

rar.18s.cohort.bnti %>% write.csv("rar.18s.cohort.bnti.csv", F, F, row.names = T, col.names = T)
## Warning in write.csv(., "rar.18s.cohort.bnti.csv", F, F, row.names = T, :
## attempt to set 'col.names' ignored

beta NTI graphs for 16s

rar.16s.cohort.bnti <- read.csv("rar.16s.cohort.bnti.csv", row.names = 1)

rar.16s.cohort.bnti[20:25,20:25]
##                 T5.6      T5.4     T3.20      T5.20 T2..16.20 T6.16
## T5.6              NA        NA        NA         NA        NA    NA
## T5.4      -0.9469721        NA        NA         NA        NA    NA
## T3.20     -0.1218622 -1.234790        NA         NA        NA    NA
## T5.20     -0.4483249 -2.669578 -2.245963         NA        NA    NA
## T2..16.20 -1.4267727 -1.517025 -1.014725 -2.5271593        NA    NA
## T6.16     -2.1458720 -1.524544 -1.611228 -0.5635819 -1.462898    NA
rar.16s.cohort.bnti.melt <- rar.16s.cohort.bnti %>% as.dist() %>% distmelt() %>% fix_rows()
## Using rowname as id variables
rar.16s.cohort.bnti.melt$Type <- "16S"

rar.16s.cohort.bnti.melt %>% arrange(-value) %>% head
##   rowname variable    value row_cat var_cat Type
## 1    T4.2     T5.3 5.513957      35      56  16S
## 2    T4.2     T5.1 3.542289      35      56  16S
## 3    T4.2     T5.2 3.541350      35      56  16S
## 4    T3.6     T4.2 3.242118      14      35  16S
## 5    T5.2    T6.14 3.183142      56      79  16S
## 6    T4.2     T4.8 2.908797      35      35  16S
rar.16s.cohort.bnti.melt %>%
  ggplot(aes(y = variable, x = rowname, fill = value)) + geom_raster() +
  theme(strip.background = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        panel.grid = element_blank(),
        panel.border = element_blank(),
        legend.position = c(.95,.4)) +
  labs(y = "Sampling Day", fill = "beta NTI") +
  facet_grid(var_cat ~ Type + row_cat, scales = "free", switch = "y") +
  scale_fill_gradient2(limits = c(-6, 6))

# This shows low turnover, just like we have already shown.

beta NTI graphs for 18s

rar.18s.cohort.bnti <- read.csv("rar.18s.cohort.bnti.csv", row.names = 1)
rar.18s.cohort.bnti.melt <- rar.18s.cohort.bnti %>% as.dist() %>% distmelt() %>% fix_rows()
## Using rowname as id variables
rar.18s.cohort.bnti.melt$Type <- "18S"

rar.18s.cohort.bnti.melt %>% arrange(abs(value)) %>% tail
##      rowname variable     value row_cat var_cat Type
## 2273   T5.18    T6.13  9.666890      56      79  18S
## 2274   T5.11    T6.15  9.780349      56      79  18S
## 2275   T5.16    T6.13 10.146818      56      79  18S
## 2276   T5.12    T6.15 10.486892      56      79  18S
## 2277   T4.22    T6.15 10.601450      35      79  18S
## 2278   T4.22    T6.14 11.709075      35      79  18S
rar.18s.cohort.bnti.melt %>%
  ggplot(aes(y = variable, x = rowname, fill = value)) + geom_raster() +
  theme(strip.background = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        panel.grid = element_blank(),
        panel.border = element_blank(),
        legend.position = c(.95,.4)) +
  labs(y = "Sampling Day", fill = "beta NTI") +
  facet_grid(var_cat ~ Type + row_cat, scales = "free", switch = "y") +
  scale_fill_gradient2(limits = c(-6, 6))

Within-sample BetaNTI plot

Part of the nestedness / turnover figure.

rar.16s.cohort.bnti.melt$microbe <- "16S"
rar.18s.cohort.bnti.melt$microbe <- "18S"
bnti.melt <- rbind(rar.16s.cohort.bnti.melt, rar.18s.cohort.bnti.melt)

bnti.melt %>% filter(row_cat == var_cat) %>% dplyr::select(value) %>% summary
##      value        
##  Min.   :-5.1428  
##  1st Qu.:-1.8078  
##  Median :-0.2849  
##  Mean   :-0.3026  
##  3rd Qu.: 1.1998  
##  Max.   : 7.7333
bnti.melt %>% filter(microbe == "16S" & row_cat == var_cat) %>%
  ggplot(aes(y = value, x = as.numeric(as.character(var_cat)))) +
  geom_hline(yintercept = c(0, 2, -2), color = "blue", alpha = 0.6) +
  geom_boxplot(aes(group = var_cat), outlier.alpha = 0) + geom_jitter(width = .5, alpha = .1) +
  geom_smooth(color = "#6DCD59", method = "lm", size = 1, se = F) +
  #facet_grid(~microbe) +
  labs(x = "Bacteria Sampling Day", y = "BetaNTI") +
  scale_y_continuous(limits = c(-6, 9)) +
  theme(strip.background = element_blank())

bnti.melt %>% filter(microbe == "18S" & row_cat == var_cat) %>%
  ggplot(aes(y = value, x = as.numeric(as.character(var_cat)))) +
  geom_hline(yintercept = c(0, 2, -2), color = "blue", alpha = 0.6) +
  geom_boxplot(aes(group = var_cat), outlier.alpha = 0) + geom_jitter(width = .5, alpha = .1) +
  geom_smooth(color = "#9F2F7F", method = "lm", size = 1, se = F) +
  #facet_grid(~microbe) +
  labs(x = "Eukaryotes Sampling Day", y = "BetaNTI") +
  scale_y_continuous(limits = c(-6, 9)) +
  theme(strip.background = element_blank())

GLM, with with all vs all pairwise post-hoc Tukey test

bnti.melt %>%
  filter(microbe == "16S" & row_cat == var_cat) %>%
  glm(value ~ var_cat, "gaussian", .) %>%
  glht(linfct = mcp(var_cat = "Tukey")) %>%
  summary %>% tidy %>% kable
lhs rhs estimate std.error statistic p.value
56 - 79 0 0.3213748 0.1513273 2.1237072 0.1874383
35 - 79 0 1.2251661 0.1559846 7.8544048 0.0000000
14 - 79 0 1.8147354 0.1752171 10.3570693 0.0000000
8 - 79 0 1.5869692 0.5508396 2.8810006 0.0271561
35 - 56 0 0.9037913 0.1559846 5.7941062 0.0000000
14 - 56 0 1.4933606 0.1752171 8.5229169 0.0000000
8 - 56 0 1.2655944 0.5508396 2.2975734 0.1280678
14 - 35 0 0.5895693 0.1792548 3.2890023 0.0074551
8 - 35 0 0.3618032 0.5521372 0.6552776 0.9612650
8 - 14 0 -0.2277662 0.5578757 -0.4082741 0.9933514
bnti.melt %>%
  filter(microbe == "18S" & row_cat == var_cat) %>%
  glm(value ~ var_cat, "gaussian", .) %>%
  glht(linfct = mcp(var_cat = "Tukey")) %>%
  summary %>% tidy %>% kable
lhs rhs estimate std.error statistic p.value
56 - 79 0 0.9143921 0.2146524 4.2598730 0.0001669
35 - 79 0 1.1241736 0.2102610 5.3465631 0.0000006
14 - 79 0 1.0291424 0.2198064 4.6820398 0.0000176
8 - 79 0 2.0919544 0.6402689 3.2673059 0.0081350
35 - 56 0 0.2097814 0.1781064 1.1778436 0.7424418
14 - 56 0 0.1147503 0.1892804 0.6062449 0.9708025
8 - 56 0 1.1775623 0.6304415 1.8678375 0.3067817
14 - 35 0 -0.0950312 0.1842853 -0.5156742 0.9839322
8 - 35 0 0.9677808 0.6289599 1.5387005 0.5072353
8 - 14 0 1.0628120 0.6322149 1.6810929 0.4154283

BetaNTI vs our founders species

For each of our Founders Species, let’s see how their mean abundances changes with betaNTI. A positive slope (high abundance with high betaNTI), means that the microbe is most successful in a stochastic, high turnover environment. A negative slope (high abundance with negative betaNTI), means that the microbe is most successful in a stable environment with low turnover.

# It turns out that getting the mean values from all pairs in a vector is
# really hard. Here is the solution I like. This is stylistically dplyr / TidyR

# select.16s.melt is our starting object, which is the founders species from the
# line plots, melted into a data frame.
# select.16s.melt %>% head

# First, we will group_by to denote that each sample appears once for each microbe,
# then we will select the Sample, Abundance, and (implicitly) the Taxonomy columns.
s.a.16s <- select.16s.melt %>% group_by(Taxonomy) %>% dplyr::select(Sample1 = Sample, Abundance)
## Adding missing grouping variables: `Taxonomy`
s.a.16s
## # A tibble: 426 x 3
## # Groups:   Taxonomy [6]
##                                        Taxonomy Sample1 Abundance
##  *                                        <chr>   <chr>     <dbl>
##  1 Phormidesmiales, Nodosilineaceae, uncultured    T5.3 0.6658730
##  2 Phormidesmiales, Nodosilineaceae, uncultured    T4.8 0.6401574
##  3 Phormidesmiales, Nodosilineaceae, uncultured    T4.9 0.6332071
##  4 Phormidesmiales, Nodosilineaceae, uncultured   T4.21 0.6247886
##  5 Phormidesmiales, Nodosilineaceae, uncultured    T4.4 0.6242189
##  6 Phormidesmiales, Nodosilineaceae, uncultured    T4.6 0.6118007
##  7 Phormidesmiales, Nodosilineaceae, uncultured   T3.16 0.6032009
##  8 Phormidesmiales, Nodosilineaceae, uncultured   T4.11 0.5892154
##  9 Phormidesmiales, Nodosilineaceae, uncultured   T4.15 0.5877039
## 10 Phormidesmiales, Nodosilineaceae, uncultured   T4.17 0.5818333
## # ... with 416 more rows
# Look at that! It's now a tibble, and also we renamed the Sample to Sample1

# Let's a make a copy of this object...
s.a.16s.dup <- s.a.16s
names(s.a.16s.dup) <- c("Taxonomy", "Sample2", "Abundance2")
# ... and also rename these to Sample2 and Abidance2.

select.16s.mean <-
  s.a.16s %>%
  expand(Sample1, Sample2 = Sample1) %>% # generate all pairs of sample1 (but save the second pair as sample2!!!)
  inner_join(s.a.16s) %>%        # join in Abundance for Sample1
  inner_join(s.a.16s.dup) %>%    # join in Abundance2 for Sample2
  mutate(abmean = (Abundance + Abundance2) / 2 ) %>% # calculate the mean of these two values.
  dplyr::select(Taxonomy, rowname = Sample1, variable = Sample2, abmean) # drop extra columns and rename
## Joining, by = c("Taxonomy", "Sample1")
## Joining, by = c("Taxonomy", "Sample2")
select.16s.mean
## # A tibble: 30,246 x 4
## # Groups:   Taxonomy [6]
##                                      Taxonomy   rowname  variable
##                                         <chr>     <chr>     <chr>
##  1 Balneolales, Balneolaceae, Rhodohalobacter T2..11.15 T2..11.15
##  2 Balneolales, Balneolaceae, Rhodohalobacter T2..11.15 T2..16.20
##  3 Balneolales, Balneolaceae, Rhodohalobacter T2..11.15   T2..2.5
##  4 Balneolales, Balneolaceae, Rhodohalobacter T2..11.15  T2..6.10
##  5 Balneolales, Balneolaceae, Rhodohalobacter T2..11.15      T3.1
##  6 Balneolales, Balneolaceae, Rhodohalobacter T2..11.15     T3.10
##  7 Balneolales, Balneolaceae, Rhodohalobacter T2..11.15     T3.12
##  8 Balneolales, Balneolaceae, Rhodohalobacter T2..11.15     T3.13
##  9 Balneolales, Balneolaceae, Rhodohalobacter T2..11.15     T3.14
## 10 Balneolales, Balneolaceae, Rhodohalobacter T2..11.15     T3.15
## # ... with 30,236 more rows, and 1 more variables: abmean <dbl>
# Admire that it worked. Thank you Hadley.

# Repeat for 18S
s.a.18s <- select.18s.melt %>% group_by(Taxonomy) %>% dplyr::select(Sample1 = Sample, Abundance)
## Adding missing grouping variables: `Taxonomy`
s.a.18s.dup <- s.a.18s
names(s.a.18s.dup) <- c("Taxonomy", "Sample2", "Abundance2")

select.18s.mean <-
  s.a.18s %>%
  expand(Sample1, Sample2 = Sample1) %>% # generate all pairs of sample1 (but save the second pair as sample2!!!)
  inner_join(s.a.18s) %>%        # join in Abundance for Sample1
  inner_join(s.a.18s.dup) %>%    # join in Abundance2 for Sample2
  mutate(abmean = (Abundance + Abundance2) / 2 ) %>% # calculate the mean of these two values.
  dplyr::select(Taxonomy, rowname = Sample1, variable = Sample2, abmean) # drop extra columns and rename
## Joining, by = c("Taxonomy", "Sample1")
## Joining, by = c("Taxonomy", "Sample2")
# 18S is done!

The ‘median of mean’ is really inelegant. The fact that each point represents a pair of samples is super strange.

Let’s recalculate the median RA so it’s simple and it matches the line graphs better.

# Subset and summarize betaNTI
select.16s.bNTImed <- select.16s.mean %>%
  left_join(bnti.melt) %>%
  filter(row_cat == var_cat) %>%
  group_by(Taxonomy, Day = factor(row_cat)) %>%
  summarise(betaNTImedian = median(value))
## Joining, by = c("rowname", "variable")
# Subset and summarize abundance
select.16s.abmed <- select.16s.melt %>%
  group_by(Taxonomy, Day) %>%
  summarise(AbundanceMedian = median(Abundance))

# Merge and graph
inner_join(select.16s.bNTImed, select.16s.abmed) %>%
  ggplot(aes(betaNTImedian, AbundanceMedian, label = Day)) +
  geom_vline(xintercept = 0, color = "red", alpha = 0.2) +
  geom_point() + geom_smooth(method = "lm") +
  geom_label(nudge_y = 0.2) +
  facet_wrap(~Taxonomy, ncol = 2) +
  labs(x = "median betaNTI", y = "median relative abundances") #, title = "16S: Within each timepoint")
## Joining, by = c("Taxonomy", "Day")

# Stat test on slope
inner_join(select.16s.bNTImed, select.16s.abmed) %>%
  do(tidy(lm(data = ., AbundanceMedian ~ betaNTImedian))) %>%
  filter(term == "betaNTImedian") %>% kable()
## Joining, by = c("Taxonomy", "Day")
Taxonomy term estimate std.error statistic p.value
Balneolales, Balneolaceae, Rhodohalobacter betaNTImedian -0.1431361 0.0346217 -4.1342852 0.0256805
Bradymonadales, Bradymonadaceae, uncultured bacterium betaNTImedian -0.0534381 0.0145308 -3.6775634 0.0348162
Nostocales, Geitlerinemaceae, Geitlerinema PCC-7105 betaNTImedian 0.0076632 0.0137712 0.5564672 0.6167054
Oceanospirillales, Saccharospirillaceae, Saccharospirillum betaNTImedian -0.1342905 0.0316192 -4.2471153 0.0239142
Phormidesmiales, Nodosilineaceae, uncultured betaNTImedian 0.2097324 0.1045604 2.0058494 0.1385388
Sphingomonadales, Sphingomonadaceae, Erythrobacter betaNTImedian 0.0209233 0.0238043 0.8789712 0.4441306
# Subset and summarize betaNTI
select.18s.bNTImed <- select.18s.mean %>%
  left_join(bnti.melt) %>%
  filter(row_cat == var_cat) %>%
  group_by(Taxonomy, Day = factor(row_cat)) %>%
  summarise(betaNTImedian = median(value))
## Joining, by = c("rowname", "variable")
# Subset and summarize abundance
select.18s.abmed <- select.18s.melt %>%
  group_by(Taxonomy, Day) %>%
  summarise(AbundanceMedian = median(Abundance))

# Merge and graph
inner_join(select.18s.bNTImed, select.18s.abmed) %>%
  ggplot(aes(betaNTImedian, AbundanceMedian, label = Day)) +
  geom_vline(xintercept = 0, color = "red", alpha = 0.2) +
  geom_point() + geom_smooth(method = "lm") +
  geom_label(nudge_y = 0.2) +
  facet_wrap(~Taxonomy, ncol = 2) +
  labs(x = "median betaNTI", y = "median relative abundances") #, title = "18S: Within each timepoint")
## Joining, by = c("Taxonomy", "Day")

# Stat test on slope
inner_join(select.18s.bNTImed, select.18s.abmed) %>%
  do(tidy(lm(data = ., AbundanceMedian ~ betaNTImedian))) %>%
  filter(term == "betaNTImedian") %>% kable()
## Joining, by = c("Taxonomy", "Day")
Taxonomy term estimate std.error statistic p.value
Metazoa (Animalia), Copepoda, Calanoida betaNTImedian 0.0702060 0.0356541 1.969087 0.1435745
Metazoa (Animalia), Podocopa, Podocopida betaNTImedian 0.1010565 0.0196389 5.145736 0.0142240
Ochrophyta, Bacillariophyceae, Nitzschia betaNTImedian -0.5997895 0.1228067 -4.884013 0.0164132
Ochrophyta, Bacillariophyceae, Pinnularia betaNTImedian 0.0531515 0.0190046 2.796775 0.0680348
Ochrophyta, Bacillariophyceae, Sellaphora betaNTImedian 0.1607111 0.0480729 3.343069 0.0442863